import pandas as pd
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
print('modlues are imported')
modlues are imported
dataset_url = 'https://raw.githubusercontent.com/datasets/covid-19/master/data/countries-aggregated.csv'
df = pd.read_csv(dataset_url)
df.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 0 | 2020-01-22 | Afghanistan | 0 | 0 | 0 |
| 1 | 2020-01-23 | Afghanistan | 0 | 0 | 0 |
| 2 | 2020-01-24 | Afghanistan | 0 | 0 | 0 |
| 3 | 2020-01-25 | Afghanistan | 0 | 0 | 0 |
| 4 | 2020-01-26 | Afghanistan | 0 | 0 | 0 |
df.tail()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 161563 | 2022-04-12 | Zimbabwe | 247094 | 0 | 5460 |
| 161564 | 2022-04-13 | Zimbabwe | 247160 | 0 | 5460 |
| 161565 | 2022-04-14 | Zimbabwe | 247208 | 0 | 5462 |
| 161566 | 2022-04-15 | Zimbabwe | 247237 | 0 | 5462 |
| 161567 | 2022-04-16 | Zimbabwe | 247237 | 0 | 5462 |
df.shape
(161568, 5)
df = df[df.Confirmed > 0]
This is the data from the date when 1st case of covid-19 was recorded
df.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 33 | 2020-02-24 | Afghanistan | 5 | 0 | 0 |
| 34 | 2020-02-25 | Afghanistan | 5 | 0 | 0 |
| 35 | 2020-02-26 | Afghanistan | 5 | 0 | 0 |
| 36 | 2020-02-27 | Afghanistan | 5 | 0 | 0 |
| 37 | 2020-02-28 | Afghanistan | 5 | 0 | 0 |
df.shape
(148455, 5)
df[df.Country == 'China']
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 30192 | 2020-01-22 | China | 548 | 28 | 17 |
| 30193 | 2020-01-23 | China | 643 | 30 | 18 |
| 30194 | 2020-01-24 | China | 920 | 36 | 26 |
| 30195 | 2020-01-25 | China | 1406 | 39 | 42 |
| 30196 | 2020-01-26 | China | 2075 | 49 | 56 |
| ... | ... | ... | ... | ... | ... |
| 31003 | 2022-04-12 | China | 1655477 | 0 | 13524 |
| 31004 | 2022-04-13 | China | 1681437 | 0 | 13586 |
| 31005 | 2022-04-14 | China | 1705231 | 0 | 13640 |
| 31006 | 2022-04-15 | China | 1759128 | 0 | 13707 |
| 31007 | 2022-04-16 | China | 1760211 | 0 | 13748 |
816 rows × 5 columns
Covid-19 data of China
fig = px.choropleth(df, locations = 'Country', locationmode = 'country names', color = 'Confirmed'
, animation_frame = 'Date')
fig.update_layout(title_text = "Global spread of Covid-19")
fig.show()
fig = px.choropleth(df, locations = 'Country', locationmode = 'country names', color = 'Deaths'
, animation_frame = 'Date')
fig.update_layout(title_text = "Global death from Covid-19")
fig.show()
df_india = df[df.Country == 'India']
df_india.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 65288 | 2020-01-30 | India | 1 | 0 | 0 |
| 65289 | 2020-01-31 | India | 1 | 0 | 0 |
| 65290 | 2020-02-01 | India | 1 | 0 | 0 |
| 65291 | 2020-02-02 | India | 2 | 0 | 0 |
| 65292 | 2020-02-03 | India | 3 | 0 | 0 |
df_india = df_india[['Date','Confirmed']]
df_india.head()
| Date | Confirmed | |
|---|---|---|
| 65288 | 2020-01-30 | 1 |
| 65289 | 2020-01-31 | 1 |
| 65290 | 2020-02-01 | 1 |
| 65291 | 2020-02-02 | 2 |
| 65292 | 2020-02-03 | 3 |
calculating the first derivation of confirmed column
df_india['Infection Rate'] = df_india['Confirmed'].diff()
df_india.head(10)
| Date | Confirmed | Infection Rate | |
|---|---|---|---|
| 65288 | 2020-01-30 | 1 | NaN |
| 65289 | 2020-01-31 | 1 | 0.0 |
| 65290 | 2020-02-01 | 1 | 0.0 |
| 65291 | 2020-02-02 | 2 | 1.0 |
| 65292 | 2020-02-03 | 3 | 1.0 |
| 65293 | 2020-02-04 | 3 | 0.0 |
| 65294 | 2020-02-05 | 3 | 0.0 |
| 65295 | 2020-02-06 | 3 | 0.0 |
| 65296 | 2020-02-07 | 3 | 0.0 |
| 65297 | 2020-02-08 | 3 | 0.0 |
According to the data on 30st Jan 2020 there was first covid confirmed case in India.
px.line(df_india, x = 'Date', y = ['Confirmed','Infection Rate'])
df_india['Infection Rate'].max()
414188.0
India witness 414188 people were infected in single day
df_china = df[df.Country == 'China']
df_china.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 30192 | 2020-01-22 | China | 548 | 28 | 17 |
| 30193 | 2020-01-23 | China | 643 | 30 | 18 |
| 30194 | 2020-01-24 | China | 920 | 36 | 26 |
| 30195 | 2020-01-25 | China | 1406 | 39 | 42 |
| 30196 | 2020-01-26 | China | 2075 | 49 | 56 |
df_china = df_china[['Date','Confirmed']]
df_china.head()
| Date | Confirmed | |
|---|---|---|
| 30192 | 2020-01-22 | 548 |
| 30193 | 2020-01-23 | 643 |
| 30194 | 2020-01-24 | 920 |
| 30195 | 2020-01-25 | 1406 |
| 30196 | 2020-01-26 | 2075 |
df_china['Infection Rate'] = df_china['Confirmed'].diff()
df_china.head(10)
| Date | Confirmed | Infection Rate | |
|---|---|---|---|
| 30192 | 2020-01-22 | 548 | NaN |
| 30193 | 2020-01-23 | 643 | 95.0 |
| 30194 | 2020-01-24 | 920 | 277.0 |
| 30195 | 2020-01-25 | 1406 | 486.0 |
| 30196 | 2020-01-26 | 2075 | 669.0 |
| 30197 | 2020-01-27 | 2877 | 802.0 |
| 30198 | 2020-01-28 | 5509 | 2632.0 |
| 30199 | 2020-01-29 | 6087 | 578.0 |
| 30200 | 2020-01-30 | 8141 | 2054.0 |
| 30201 | 2020-01-31 | 9802 | 1661.0 |
According to the data on 22 Jan 2022 China registered 1st Covid case
px.line(df_china, x = 'Date', y = ['Confirmed','Infection Rate'])
df_china['Infection Rate'].max()
77402.0
China witness 77402 people were infected in single day
countries = list(df['Country'].unique())
max_infection_rate = []
for c in countries :
MIR = df[df.Country == c].Confirmed.diff().max()
max_infection_rate.append(MIR)
df_MIR = pd.DataFrame()
df_MIR['Country'] = countries
df_MIR['Max Infection Rate'] = max_infection_rate
df_MIR.head(10)
| Country | Max Infection Rate | |
|---|---|---|
| 0 | Afghanistan | 3243.0 |
| 1 | Albania | 4789.0 |
| 2 | Algeria | 2521.0 |
| 3 | Andorra | 2313.0 |
| 4 | Angola | 5035.0 |
| 5 | Antarctica | 0.0 |
| 6 | Antigua and Barbuda | 468.0 |
| 7 | Argentina | 139853.0 |
| 8 | Armenia | 4388.0 |
| 9 | Australia | 175271.0 |
px.bar(df_MIR, x = 'Country', y = 'Max Infection Rate', color = 'Country', title = 'Global maximum infection rate')
US is the country with highest Infection Rate followed by UK and Turkey
px.bar(df_MIR, x = 'Country', y = 'Max Infection Rate', color = 'Country', title = 'Global maximum infection rate expanded', log_y = True )
On March 24, 2020, Prime Minister Narendra Modi called for a complete lockdown of the entire nation for 21 days in an effort to contain the COVID-19 pandemic. In a televised address to the nation, the PM said that even those nations with the best medical facilities could not contain the virus and that social distancing is the only option to mitigate it. He said this decision was taken from the experiences of health sector experts and experiences of other countries and that 21 days is essential to break the chain of infection.
Source https://economictimes.indiatimes.com/defaultinterstitial.cms
india_lockdown_start_date = '2020-03-24'
india_lockdown_a_month_later = '2020-04-24'
india_lockdown_six_months_later = '2020-09-24'
df.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 33 | 2020-02-24 | Afghanistan | 5 | 0 | 0 |
| 34 | 2020-02-25 | Afghanistan | 5 | 0 | 0 |
| 35 | 2020-02-26 | Afghanistan | 5 | 0 | 0 |
| 36 | 2020-02-27 | Afghanistan | 5 | 0 | 0 |
| 37 | 2020-02-28 | Afghanistan | 5 | 0 | 0 |
df_india = df[df.Country == 'India']
df_india.head()
| Date | Country | Confirmed | Recovered | Deaths | |
|---|---|---|---|---|---|
| 65288 | 2020-01-30 | India | 1 | 0 | 0 |
| 65289 | 2020-01-31 | India | 1 | 0 | 0 |
| 65290 | 2020-02-01 | India | 1 | 0 | 0 |
| 65291 | 2020-02-02 | India | 2 | 0 | 0 |
| 65292 | 2020-02-03 | India | 3 | 0 | 0 |
df_india['Infection Rate'] = df_india.Confirmed.diff()
df_india.head()
C:\Users\bhatt\AppData\Local\Temp\ipykernel_52920\528034351.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
| Date | Country | Confirmed | Recovered | Deaths | Infection Rate | |
|---|---|---|---|---|---|---|
| 65288 | 2020-01-30 | India | 1 | 0 | 0 | NaN |
| 65289 | 2020-01-31 | India | 1 | 0 | 0 | 0.0 |
| 65290 | 2020-02-01 | India | 1 | 0 | 0 | 0.0 |
| 65291 | 2020-02-02 | India | 2 | 0 | 0 | 1.0 |
| 65292 | 2020-02-03 | India | 3 | 0 | 0 | 1.0 |
fig = px.line(df_india, x= 'Date', y = 'Infection Rate', title = 'Before and after lockdown in India ')
fig.add_shape(
dict(
type = 'line',
x0 = india_lockdown_start_date,
y0 = 0,
x1 = india_lockdown_start_date,
y1 = df_india['Infection Rate'].max(),
line = dict(color = 'red', width = 2)
)
)
fig.add_annotation(
dict(
x = india_lockdown_start_date,
y = df_india['Infection Rate'].max(),
text = 'starting date of the lockdown'
)
)
fig.add_shape(
dict(
type = 'line',
x0 = india_lockdown_a_month_later,
y0 = 0,
x1 = india_lockdown_a_month_later,
y1 = df_india['Infection Rate'].max(),
line = dict(color = 'orange', width = 2))
)
fig.add_annotation(
dict(
x = india_lockdown_a_month_later,
y = 0,
text = '1 month of the lockdown'
)
)
fig = px.line(df_india, x= 'Date', y = 'Infection Rate', title = 'Before and after lockdown in India ')
fig.add_shape(
dict(
type = 'line',
x0 = india_lockdown_start_date,
y0 = 0,
x1 = india_lockdown_start_date,
y1 = df_india['Infection Rate'].max(),
line = dict(color = 'red', width = 2)
)
)
fig.add_annotation(
dict(
x = india_lockdown_start_date,
y = df_india['Infection Rate'].max(),
text = 'starting date of the lockdown'
)
)
fig.add_shape(
dict(
type = 'line',
x0 = india_lockdown_six_months_later,
y0 = 0,
x1 = india_lockdown_six_months_later,
y1 = df_india['Infection Rate'].max(),
line = dict(color = 'orange', width = 2))
)
fig.add_annotation(
dict(
x = india_lockdown_six_months_later,
y = 0,
text = '6 months of the lockdown'
)
)
df_india.head()
| Date | Country | Confirmed | Recovered | Deaths | Infection Rate | |
|---|---|---|---|---|---|---|
| 65288 | 2020-01-30 | India | 1 | 0 | 0 | NaN |
| 65289 | 2020-01-31 | India | 1 | 0 | 0 | 0.0 |
| 65290 | 2020-02-01 | India | 1 | 0 | 0 | 0.0 |
| 65291 | 2020-02-02 | India | 2 | 0 | 0 | 1.0 |
| 65292 | 2020-02-03 | India | 3 | 0 | 0 | 1.0 |
df_india['Death Rate'] = df_india.Deaths.diff()
C:\Users\bhatt\AppData\Local\Temp\ipykernel_52920\2009258026.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
df_india.head()
| Date | Country | Confirmed | Recovered | Deaths | Infection Rate | Death Rate | |
|---|---|---|---|---|---|---|---|
| 65288 | 2020-01-30 | India | 1 | 0 | 0 | NaN | NaN |
| 65289 | 2020-01-31 | India | 1 | 0 | 0 | 0.0 | 0.0 |
| 65290 | 2020-02-01 | India | 1 | 0 | 0 | 0.0 | 0.0 |
| 65291 | 2020-02-02 | India | 2 | 0 | 0 | 1.0 | 0.0 |
| 65292 | 2020-02-03 | India | 3 | 0 | 0 | 1.0 | 0.0 |
fig = px.line(df_india, x = 'Date', y = ['Infection Rate', 'Death Rate'])
fig.show()
df_india['Infection Rate'] = df_india['Infection Rate']/df_india['Infection Rate'].max()
df_india['Death Rate'] = df_india['Death Rate']/df_india['Death Rate'].max()
C:\Users\bhatt\AppData\Local\Temp\ipykernel_52920\2459862756.py:1: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy C:\Users\bhatt\AppData\Local\Temp\ipykernel_52920\2459862756.py:2: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
fig = px.line(df_india, x = 'Date', y = ['Infection Rate', 'Death Rate'])
fig.add_shape(
dict(
type = 'line',
x0 = india_lockdown_start_date,
y0 = 0,
x1 = india_lockdown_start_date,
y1 = df_india['Infection Rate'].max(),
line = dict(color = 'red', width = 2)
)
)
fig.add_annotation(
dict(
x = india_lockdown_start_date,
y = df_india['Infection Rate'].max(),
text = 'starting date of the lockdown'
)
)
fig.add_shape(
dict(
type = 'line',
x0 = india_lockdown_a_month_later,
y0 = 0,
x1 = india_lockdown_a_month_later,
y1 = df_india['Infection Rate'].max(),
line = dict(color = 'orange', width = 2))
)
fig.add_annotation(
dict(
x = india_lockdown_a_month_later,
y = 0,
text = '1 month of the lockdown'
)
)
fig = px.line(df_india, x = 'Date', y = ['Infection Rate', 'Death Rate'])
fig.add_shape(
dict(
type = 'line',
x0 = india_lockdown_start_date,
y0 = 0,
x1 = india_lockdown_start_date,
y1 = df_india['Infection Rate'].max(),
line = dict(color = 'red', width = 2)
)
)
fig.add_annotation(
dict(
x = india_lockdown_start_date,
y = df_india['Infection Rate'].max(),
text = 'starting date of the lockdown'
)
)
fig.add_shape(
dict(
type = 'line',
x0 = india_lockdown_six_months_later,
y0 = 0,
x1 = india_lockdown_six_months_later,
y1 = df_india['Infection Rate'].max(),
line = dict(color = 'orange', width = 2))
)
fig.add_annotation(
dict(
x = india_lockdown_six_months_later,
y = 0,
text = '6 months of the lockdown'
)
)